Each row is a room listed on AirBnB in New York
neighbourhoodgroup_availability <- ddply(df,~neighbourhood_group,summarise,avg=mean(availability_365))
ggplot(neighbourhoodgroup_availability, aes(x = neighbourhood_group, y = avg, fill = neighbourhood_group))+ geom_bar(stat="identity") + xlab("Neighbourhood Group")+ylab("Average availability per year")+scale_fill_discrete("Neighbourhood group")+ggtitle("Average availability per year against neighbourhood group") + labs(subtitle = "plot 1")
neighbourhoodgroup_roomType <- ddply(df,~neighbourhood_group + room_type,summarise,count=length(room_type))
ggplot(neighbourhoodgroup_roomType, aes(x = neighbourhood_group, y = count, fill = room_type))+geom_bar(stat = "identity", position = "fill")+xlab("Neighbourhood Group")+ylab("Proportion of each room type")+scale_fill_discrete("Neighbourhood group")+ggtitle("Proportion of room type per neighbourhood group") + labs(subtitle = 'plot 2')
n <- ddply(df,~room_type,summarise,avgAvailability=mean(availability_365))
ggplot(n,aes(x=room_type,y=avgAvailability,fill=room_type)) + geom_bar(stat = "identity") + ylab('Average avaialability per year') + xlab('Room type') + scale_fill_discrete("Room type") + labs(subtitle = 'plot 3')+ggtitle("Average availability by room type")
ggplot(df,aes(x=room_type,y=availability_365,fill = room_type)) + geom_boxplot() + facet_wrap(~neighbourhood_group)+theme(axis.text.x = element_blank(),axis.title.x = element_blank()) + ylab('Availability per year') + labs(fill='Room type') + labs(subtitle = 'plot 4')+ggtitle("Availability by room type across neighborhood group")
We’ll now inspect the relation between availability per year and price
ggplot(df,aes(x = price, y = availability_365)) + geom_point() + geom_smooth() + xlab('Price') + ylab('Availability per year') +labs(subtitle = 'plot 5')+ggtitle("Availability against price")
ggplot(df,aes(x = price,y = availability_365))+ geom_point() + geom_smooth() + facet_wrap(~neighbourhood_group) + xlab('Price')+ylab('Avaialbility per year') + labs(subtitle = 'plot 6')+ggtitle("Availability against price by neighborhood group")
neighbourhoodgroup_price <- ddply(df,~neighbourhood_group,summarise,avg=mean(price))
ggplot(neighbourhoodgroup_price, aes(x = neighbourhood_group, y = avg, fill = neighbourhood_group)) + geom_bar(stat="identity")+ xlab("Neighbourhood Group") + ylab("Average Price") + scale_fill_discrete("Neighbourhood group") + ggtitle("Average price against neighbourhood group") + labs(subtitle ='plot 7')
ggplot(data = df,aes(x = neighbourhood_group,y = price,fill = room_type)) + geom_boxplot() + xlab('Neighbourhood group') + ylab('price') + facet_wrap(~room_type) + theme(axis.text.x = element_text(angle = 90)) +guides(fill = "none")+ggtitle("Price by neighborhood group across room type")+labs(subtitle ='plot 8')
t <- table(df$neighbourhood)
top_neighbourhoods <- c('Bedford-Stuyvesant','Bushwick','Crown Heights','Williamsburg','Harlem','Midtown','Upper West Side',"Hell's Kitchen",'Upper East Side','East Village','Chelsea','Lower East Side')
df_sub <- subset(df,neighbourhood %in% top_neighbourhoods)
n <- ddply(df_sub,~neighbourhood_group + neighbourhood ,summarise,avgP = mean(price))
ggplot(n,aes(x = neighbourhood, y = avgP, fill = neighbourhood_group)) + geom_bar(stat = 'identity')+ xlab('Neighbourhood') + ylab('Average price') + theme(axis.text.x = element_text(angle = 90)) + labs(subtitle = 'plot 9') +ggtitle("Average price of ten most expensive neighborhoods")
t <- table(df$neighbourhood)
top_neighbourhoods <- c('Bedford-Stuyvesant','Bushwick','Crown Heights','Williamsburg','Harlem','Midtown','Upper West Side',"Hell's Kitchen",'Upper East Side','East Village','Chelsea','Lower East Side')
df_sub <- subset(df,neighbourhood %in% top_neighbourhoods)
df11 <- ddply(df_sub,~ room_type + neighbourhood_group + neighbourhood,summarise,count = length(neighbourhood))
ggplot(df11,aes(x = neighbourhood,y= count, fill = neighbourhood_group)) + geom_bar(stat = 'identity') + facet_wrap(~room_type) + theme(axis.text.x = element_text(angle=90)) + labs(fill='Neighbourhood group') + xlab('Neighbourhood') + ylab('Count')+ labs(subtitle = 'plot 10')+ggtitle("Count of room types in ten most expensive neighborhoods")
Most listings in the top 10 neighborhoods are private rooms and entire home apartments, Hotels and shared rooms are very few.
To continue our exploration, we wanted to see if instead of the neighborhoods affecting the data it may be a select number of owners affecting the prices.ggplot(df,aes(x=calculated_host_listings_count,y=price)) + geom_point() + geom_smooth() + xlab('Calculated host listing count') + ylab('Price')+ labs(subtitle = 'plot 11')+ggtitle("Price against host listing count")
ggplot(df,aes(x=calculated_host_listings_count,y=price)) + geom_point() + geom_smooth() + xlab('Calculated host listing count') + ylab('Price')+ labs(subtitle = 'plot 12')+ggtitle("Price against host listing count by room type")+facet_wrap(~room_type)
ggplot(df,aes(x=calculated_host_listings_count,y=availability_365)) + geom_point() + geom_smooth() + xlab('Calculated host listing count') + ylab('Availability per year')+ labs(subtitle = 'plot 12')+ggtitle("Availability against host listing count")+labs(subtitle="plot 13")
ggplot(df,aes(x=calculated_host_listings_count,y=availability_365)) + geom_point() + geom_smooth() + xlab('Calculated host listing count') + ylab('Availability per year') + facet_wrap(~room_type)+labs(subtitle="plot 14")+ggtitle("Availability against host listing count by room type")
ggplot(df,aes(number_of_reviews_ltm,availability_365)) + geom_point() + geom_smooth() + xlab('Number of reviews last twelve months')+ylab('Availability per year')+labs(subtitle = "plot 15")+ggtitle("Availability against reviews in last twelve months")
ggplot(df,aes(number_of_reviews_ltm,availability_365)) + geom_point() + geom_smooth() + facet_wrap(~room_type) +xlim(0,365)+
ylab('Availability per year')+xlab('Number of reviews last twelve months')+labs(subtitle = "plot 16") + ggtitle("Availability against reviews in last twelve months across room type")
pal <- colorFactor(c("navy", "red","black"), domain = c("Private room", "Entire home/apt","shared room"))
map <- leaflet() %>% addTiles() %>% setView(lat = mean(df$latitude),lng = mean(df$longitude), zoom = 10) %>% addCircleMarkers(data=df[,],radius = 0.1,color = ~pal(room_type))
## Assuming "longitude" and "latitude" are longitude and latitude, respectively
map
df$aval_cat <- cut(df$availability_365,c(0,100,200,400),labels = c('<100','100-200','>200'))
pal <- colorFactor(c("yellow", "orange","red"), domain = c('<100','100-200','>200'))
map <- leaflet() %>% addTiles() %>% setView(lat = mean(df$latitude),lng = mean(df$longitude), zoom = 10) %>% addCircleMarkers(data=df[,],radius = 0.1,color = ~pal(aval_cat))
map
df$price_cat <- cut(df$price,c(0,100,200,1000),labels = c('<100','100-200','>200'))
pal <- colorFactor(c("yellow", "orange","red"), domain = c('<100','100-200','>200'))
map <- leaflet() %>% addTiles() %>% setView(lat = mean(df$latitude),lng = mean(df$longitude), zoom = 10) %>% addCircleMarkers(data=df[,],radius = 0.1,color = ~pal(price_cat))
map
To explore the latitude and longitude, we felt it was best to plot a map and have the points representing different levels of the columns. We can see that there is a cluster where the private rooms are condensed and it would be interesting to see what in that area makes it more appealing for private rooms as opposed to entire homes. It is also very clear from the map that there are clusters of red for the price around Manhattan confirming what we established from the graphs that it is the most expensive. We can also see some yellow clusters indicating some areas with cheaper rent and we could go into detail and see that the listings near the airports tend to be cheaper.